Skip to content

dash_charts.utils_data⚓︎

Helpers for building Dash applications.

View Source
"""Helpers for building Dash applications."""

import csv
import json
import sqlite3
import time
from contextlib import ContextDecorator
from datetime import datetime
from pathlib import Path

import pandas as pd
from cerberus import Validator

# ----------------------------------------------------------------------------------------------------------------------
# For Working with Data


def enable_verbose_pandas(max_columns=None, max_rows=None, max_seq_items=None):
    """Update global pandas configuration for printed dataframes.

    Args:
        max_columns: the number of max columns. Default is None (to show all)
        max_rows: the number of max rows. Default is None (to show all)
        max_seq_items: the number of max sequence items. Default is None (to show all) # TODO: what does this set?

    """
    # Enable all columns to be displayed at once (or tweak to set a new limit)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', max_columns)

    # Optionally modify number of rows shown
    pd.set_option('display.max_rows', max_rows)
    if max_seq_items:
        pd.options('display.max_seq_items', max_seq_items)


def append_df(df_old, df_new):
    """Handle appending a dataframe if the old_df is None. Useful for iteration.

    Args:
        df_old: dataframe or None
        df_new: new dataframe to append. Expects all columns to match

    Returns:
        dataframe: combined dataframe

    """
    return df_new if df_old is None else pd.concat([df_old, df_new]).reset_index(drop=True)


def validate(document, schema, **validator_kwargs):
    """Validate a data structure. Return errors if any found.

    Cerberus Documentation: https://docs.python-cerberus.org/en/stable/validation-rules.html

    Args:
        document: data structure to validate
        schema: expected structure
        validator_kwargs: additional keyword arguments for Validator class

    Returns:
        list: validation errors

    """
    validator = Validator(schema, **validator_kwargs)
    validator.validate(document)
    return validator.errors


def json_dumps_compact(data):   # noqa: CCR001
    """Format provided dictionary into compact JSON. Lists will be in one line rather than split on new lines.

    Args:
        data: JSON-serializable dictionary

    Returns:
        str: JSON-formatted string with lists compacted into a single line

    """
    clean_data = {}
    # Check each key/value pair to determine if any intermediary strings are needed for later formatting
    for key, raw in data.items():
        # PLANNED: Convert to FP and recursive calls?
        if isinstance(raw, list):
            values = [f'``{value}``' if isinstance(value, str) else value for value in raw]
            clean_data[key] = '[' + ','.join(map(str, values)) + ']'
        else:
            clean_data[key] = raw
    # Format the dictionary into JSON and replace the special characters used as intermediaries
    raw_json = json.dumps(clean_data, indent=4, separators=(',', ': '), sort_keys=True)
    return (
        raw_json
        .replace(': "[', ': [')
        .replace(']"', ']')
        .replace('``', '"')
        .replace("'", '"')
    )


def write_pretty_json(filename, obj):
    """Write indented JSON file.

    Args:
        filename: Path or plain string filename to write (should end with `.json`)
        obj: JSON object to write

    """
    Path(filename).write_text(json.dumps(obj, indent=4, separators=(',', ': ')))


def write_csv(csv_path, rows):
    """Write a csv file with appropriate line terminator and encoding.

    Args:
        csv_path: path to CSV file
        rows: list of lists to write to CSV file

    """
    with open(csv_path, 'w', newline='\n', encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
        for row in rows:
            writer.writerow(row)


# ----------------------------------------------------------------------------------------------------------------------
# Time Helpers

US_TIME_FORMAT = '%m/%d/%Y %H:%M:%S'
"""String time format with month/year (MM/DD/YYYY HH:MM:SS)."""

DASHED_TIME_FORMAT_US = '%m-%d-%Y %H:%M:%S'
"""Dashed time format with month first (MM-DD-YYYY HH:MM:SS)."""

DASHED_TIME_FORMAT_YEAR = '%Y-%m-%d %H:%M:%S'
"""Dashed time format with year first (YYYY-MM-DD HH:MM:SS)."""

TIME_FORMAT_FILE = '%Y-%m-%d_%H%M%S'
"""Filename-safe time format with year first (YYYY-MM-DD_HHMMSS)."""

GDP_TIME_FORMAT = '%d%b%Y %H:%M:%S'
"""Good Documentation Practice time format (DDMMMYYYY HH:MM:SS)."""


def get_unix(str_ts, date_format):
    """Get unix timestamp from a string timestamp in date_format.

    Args:
        str_ts: string timestamp in `date_format`
        date_format: datetime time stamp format

    Returns:
        int: unix timestamp

    """
    return datetime.strptime(str_ts, date_format).timestamp()


def format_unix(unix_ts, date_format):
    """Format unix timestamp as a string timestamp in date_format.

    Args:
        unix_ts: unix timestamp
        date_format: datetime time stamp format

    Returns:
        string: formatted timestamp in `date_format`

    """
    return datetime.fromtimestamp(unix_ts).strftime(date_format)


# ----------------------------------------------------------------------------------------------------------------------
# General SQL


def uniq_table_id():
    """Return a unique table ID based on the current time in ms.

    Returns:
        str: in format `U<timestamp_ns>`

    """
    return f'U{time.time_ns()}'


# ----------------------------------------------------------------------------------------------------------------------
# sqlite3


class SQLConnection(ContextDecorator):
    """Ensure the SQLite connection is properly opened and closed."""

    def __init__(self, db_path):
        """Initialize context wrapper.

        Args:
            db_path: Path to a SQLite file

        """
        self.conn = None
        self.db_path = db_path

    def __enter__(self):
        """Connect to the database and return connection reference.

        Returns:
            dict: connection to sqlite database

        """
        self.conn = sqlite3.connect(self.db_path)
        return self.conn

    def __exit__(self, exc_type, exc_value, traceback):
        """Close connection."""  # noqa: DAR101
        self.conn.close()


def list_sql_tables(db_path):
    """Return all table names from the SQL database.

    Args:
        db_path: path to SQLite database file

    Returns:
        list: of unique table names in the SQL database

    """
    with SQLConnection(db_path) as conn:
        cursor = conn.cursor()
        cursor.execute('SELECT name FROM sqlite_master WHERE TYPE = "table"')
        return [names[0] for names in cursor.fetchall()]

Variables⚓︎

DASHED_TIME_FORMAT_US

Dashed time format with month first (MM-DD-YYYY HH:MM:SS).

DASHED_TIME_FORMAT_YEAR

Dashed time format with year first (YYYY-MM-DD HH:MM:SS).

GDP_TIME_FORMAT

Good Documentation Practice time format (DDMMMYYYY HH:MM:SS).

TIME_FORMAT_FILE

Filename-safe time format with year first (YYYY-MM-DD_HHMMSS).

US_TIME_FORMAT

String time format with month/year (MM/DD/YYYY HH:MM:SS).

Functions⚓︎

append_df⚓︎

def append_df(
    df_old,
    df_new
)

Handle appending a dataframe if the old_df is None. Useful for iteration.

Parameters:

Name Description
df_old dataframe or None
df_new new dataframe to append. Expects all columns to match

Returns:

Type Description
dataframe combined dataframe
View Source
def append_df(df_old, df_new):
    """Handle appending a dataframe if the old_df is None. Useful for iteration.

    Args:
        df_old: dataframe or None
        df_new: new dataframe to append. Expects all columns to match

    Returns:
        dataframe: combined dataframe

    """
    return df_new if df_old is None else pd.concat([df_old, df_new]).reset_index(drop=True)

enable_verbose_pandas⚓︎

def enable_verbose_pandas(
    max_columns=None,
    max_rows=None,
    max_seq_items=None
)

Update global pandas configuration for printed dataframes.

Parameters:

Name Description
max_columns the number of max columns. Default is None (to show all)
max_rows the number of max rows. Default is None (to show all)
max_seq_items the number of max sequence items. Default is None (to show all) # TODO: what does this set?
View Source
def enable_verbose_pandas(max_columns=None, max_rows=None, max_seq_items=None):
    """Update global pandas configuration for printed dataframes.

    Args:
        max_columns: the number of max columns. Default is None (to show all)
        max_rows: the number of max rows. Default is None (to show all)
        max_seq_items: the number of max sequence items. Default is None (to show all) # TODO: what does this set?

    """
    # Enable all columns to be displayed at once (or tweak to set a new limit)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', max_columns)

    # Optionally modify number of rows shown
    pd.set_option('display.max_rows', max_rows)
    if max_seq_items:
        pd.options('display.max_seq_items', max_seq_items)

format_unix⚓︎

def format_unix(
    unix_ts,
    date_format
)

Format unix timestamp as a string timestamp in date_format.

Parameters:

Name Description
unix_ts unix timestamp
date_format datetime time stamp format

Returns:

Type Description
string formatted timestamp in date_format
View Source
def format_unix(unix_ts, date_format):
    """Format unix timestamp as a string timestamp in date_format.

    Args:
        unix_ts: unix timestamp
        date_format: datetime time stamp format

    Returns:
        string: formatted timestamp in `date_format`

    """
    return datetime.fromtimestamp(unix_ts).strftime(date_format)

get_unix⚓︎

def get_unix(
    str_ts,
    date_format
)

Get unix timestamp from a string timestamp in date_format.

Parameters:

Name Description
str_ts string timestamp in date_format
date_format datetime time stamp format

Returns:

Type Description
int unix timestamp
View Source
def get_unix(str_ts, date_format):
    """Get unix timestamp from a string timestamp in date_format.

    Args:
        str_ts: string timestamp in `date_format`
        date_format: datetime time stamp format

    Returns:
        int: unix timestamp

    """
    return datetime.strptime(str_ts, date_format).timestamp()

json_dumps_compact⚓︎

def json_dumps_compact(
    data
)

Format provided dictionary into compact JSON. Lists will be in one line rather than split on new lines.

Parameters:

Name Description
data JSON-serializable dictionary

Returns:

Type Description
str JSON-formatted string with lists compacted into a single line
View Source
def json_dumps_compact(data):   # noqa: CCR001
    """Format provided dictionary into compact JSON. Lists will be in one line rather than split on new lines.

    Args:
        data: JSON-serializable dictionary

    Returns:
        str: JSON-formatted string with lists compacted into a single line

    """
    clean_data = {}
    # Check each key/value pair to determine if any intermediary strings are needed for later formatting
    for key, raw in data.items():
        # PLANNED: Convert to FP and recursive calls?
        if isinstance(raw, list):
            values = [f'``{value}``' if isinstance(value, str) else value for value in raw]
            clean_data[key] = '[' + ','.join(map(str, values)) + ']'
        else:
            clean_data[key] = raw
    # Format the dictionary into JSON and replace the special characters used as intermediaries
    raw_json = json.dumps(clean_data, indent=4, separators=(',', ': '), sort_keys=True)
    return (
        raw_json
        .replace(': "[', ': [')
        .replace(']"', ']')
        .replace('``', '"')
        .replace("'", '"')
    )

list_sql_tables⚓︎

def list_sql_tables(
    db_path
)

Return all table names from the SQL database.

Parameters:

Name Description
db_path path to SQLite database file

Returns:

Type Description
list of unique table names in the SQL database
View Source
def list_sql_tables(db_path):
    """Return all table names from the SQL database.

    Args:
        db_path: path to SQLite database file

    Returns:
        list: of unique table names in the SQL database

    """
    with SQLConnection(db_path) as conn:
        cursor = conn.cursor()
        cursor.execute('SELECT name FROM sqlite_master WHERE TYPE = "table"')
        return [names[0] for names in cursor.fetchall()]

uniq_table_id⚓︎

def uniq_table_id()

Return a unique table ID based on the current time in ms.

Returns:

Type Description
str in format U<timestamp_ns>
View Source
def uniq_table_id():
    """Return a unique table ID based on the current time in ms.

    Returns:
        str: in format `U<timestamp_ns>`

    """
    return f'U{time.time_ns()}'

validate⚓︎

def validate(
    document,
    schema,
    **validator_kwargs
)

Validate a data structure. Return errors if any found.

Cerberus Documentation: https://docs.python-cerberus.org/en/stable/validation-rules.html

Parameters:

Name Description
document data structure to validate
schema expected structure
validator_kwargs additional keyword arguments for Validator class

Returns:

Type Description
list validation errors
View Source
def validate(document, schema, **validator_kwargs):
    """Validate a data structure. Return errors if any found.

    Cerberus Documentation: https://docs.python-cerberus.org/en/stable/validation-rules.html

    Args:
        document: data structure to validate
        schema: expected structure
        validator_kwargs: additional keyword arguments for Validator class

    Returns:
        list: validation errors

    """
    validator = Validator(schema, **validator_kwargs)
    validator.validate(document)
    return validator.errors

write_csv⚓︎

def write_csv(
    csv_path,
    rows
)

Write a csv file with appropriate line terminator and encoding.

Parameters:

Name Description
csv_path path to CSV file
rows list of lists to write to CSV file
View Source
def write_csv(csv_path, rows):
    """Write a csv file with appropriate line terminator and encoding.

    Args:
        csv_path: path to CSV file
        rows: list of lists to write to CSV file

    """
    with open(csv_path, 'w', newline='\n', encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
        for row in rows:
            writer.writerow(row)

write_pretty_json⚓︎

def write_pretty_json(
    filename,
    obj
)

Write indented JSON file.

Parameters:

Name Description
filename Path or plain string filename to write (should end with .json)
obj JSON object to write
View Source
def write_pretty_json(filename, obj):
    """Write indented JSON file.

    Args:
        filename: Path or plain string filename to write (should end with `.json`)
        obj: JSON object to write

    """
    Path(filename).write_text(json.dumps(obj, indent=4, separators=(',', ': ')))

Classes⚓︎

SQLConnection⚓︎

class SQLConnection(
    db_path
)
View Source
class SQLConnection(ContextDecorator):
    """Ensure the SQLite connection is properly opened and closed."""

    def __init__(self, db_path):
        """Initialize context wrapper.

        Args:
            db_path: Path to a SQLite file

        """
        self.conn = None
        self.db_path = db_path

    def __enter__(self):
        """Connect to the database and return connection reference.

        Returns:
            dict: connection to sqlite database

        """
        self.conn = sqlite3.connect(self.db_path)
        return self.conn

    def __exit__(self, exc_type, exc_value, traceback):
        """Close connection."""  # noqa: DAR101
        self.conn.close()

Ancestors (in MRO)⚓︎

  • contextlib.ContextDecorator

Last update: August 5, 2022
Created: August 5, 2022